#perl;

#Overlap PCR Oligo Maker

## This script prompts the user for two DNA sequences they would like to ligate via overlap methods, then outputs overlapping oligo sequences (and their Tms) suitable for PCRing the constructs.

##Routines to add = Biobrick ID retrieval from registry dump, primer dimer considerations, 


use 5.010;
#use warnings;
use POSIX;


print "\n";
say "Hi There! This script makes oligo sequences to use for Exonuclease/Repair Assembly Method \n\n If you give me the DNA sequence of your parts, I'll give you the oligos necessary for creating overlaps for your assembly. \n";


#Parts from DNA (future version will address Biobrick IDs, integrate into biobrick inventory file and registry dump)

print "What is your 5' (left) part?\n";
my $fivepart = <STDIN>;
chomp $fivepart;

print "What is your 3' (right) part?\n";
my $threepart = <STDIN>;
chomp $threepart;

my $fivepartfive = substr($fivepart, 0, 20);
my $fivepartthree = substr($fivepart, -20, 20);

my $threepartfive = substr($threepart, 0, 20);
my $threepartthree = substr($threepart, -20, 20);


# IF DNA, this subroutine:
my $fivepartthreeRC = reverse_complement($fivepartthree);

my $threepartfiveRC = reverse_complement($threepartfive);


my $fivepartreverseprimer = $threepartfiveRC.$fivepartthreeRC;
 
print "\n\n  Reverse primer for 5' Part:  ";
print &Tm($fivepartthreeRC);
print "\n\n";
print $fivepartreverseprimer;
print "\n\n";
my $threepartforwardprimer = $fivepartthree.$threepartfive;

print "  Forward primer for 3' part:  ";
print &Tm($threepartfive);
print "\n\n";
print $threepartforwardprimer;
print "\n\n";


print "There you go. Good luck with your assembly! \n\n";


sub reverse_complement { $_ = shift @_;
	# print "taking complement\n\n";
	tr /AGCTagct/TCGAtcga/d;
	reverse $_;
	return  $_;
}


##Perl script to calculate Tm of oligos
## Nearest Neighbor Tm subroutines were courtesy of Dr. David Gresham, NYU

sub Tm { 

my $seq = shift @_; 
 
$seq = uc($seq);

print "Tm: ", &calcTm(&calcNnEnthalpy($seq), &calcNnEntropy($seq));

}

#######subroutine to calculate Tm based on total enthalpy and entropy

sub calcTm {

    my ($totalEnthalpy, $totalEntropy) = @_;

    my $R = 1.9872; #gas constant cal/K-mol
    my $x = 4; #equals 4 for nonself-complementary duplex
    my $Ct = 0.0000000000006; #$Ct is strand concentration 
    
    my $Tm = ($totalEnthalpy * 1000) / ($totalEntropy + $R * log($Ct/$x)) - 273.15;

    return $Tm;

}



#######subroutine to calculate enthalpy by summing over nearest neighbors using the unified nearest neighbor values    
              
sub calcNnEnthalpy {

#print "Calculating Enthalpy..\n";

    my $dna = shift(@_);

    my %enthalpies = (
        'AA' => -7.9,
        'TT' => -7.9,                                                                                                        
        'AT' => -7.2,                                                                                                        
        'TA' => -7.2,                                                                                                        
        'CA' => -8.5,                                                                                                        
        'TG' => -8.5,                                                                                                        
        'GT' => -8.4,                                                                                                        
        'AC' => -8.4,                                                                                                        
        'CT' => -7.8,                                                                                                        
        'AG' => -7.8,                                                                                                        
        'GA' => -8.2,                                                                                                        
        'TC' => -8.2,                                                                                                        
        'CG' => -10.6,                                                                                                      
        'GC' => -9.8,                                                                                                       
        'GG' => -8.0,                                                                                                        
        'CC' => -8.0                                                                                                          
        );

#select initialization value depending on whether terminal bases are A/T                                                            

    my $totalEnthalpy;

    if((substr($dna,0,1) eq 'A' || substr($dna,0,1) eq 'T') && (substr($dna,-1) eq 'A'|| substr($dna,-1) eq 'T'))
    {$totalEnthalpy =  4.6;}                                                                                                   

    elsif((substr($dna,-1) eq 'A'|| substr($dna,-1) eq 'T') && (substr($dna,-1) ne 'A'|| substr($dna,-1) ne 'T'))
    {$totalEnthalpy = 2.4;}                                                                                                   

    elsif((substr($dna,-1) ne 'A' || substr($dna,-1) ne 'T') && (substr($dna,-1) eq 'A' || substr($dna,-1) eq 'T'))
    {$totalEnthalpy = 2.4;}                                                                                                    

    else{$totalEnthalpy = 0.2};                                                                                              

#calculate total enthalpy based on each NN for oligo                                                                                

    for(my $i=0; $i< (length($dna) - 1); $i++) {

        $totalEnthalpy += $enthalpies{substr($dna,$i,2)};

    }

    return $totalEnthalpy;

}

#####subroutine to calculate entropy by summing over nearest neighbors using the nearest neighbor parameters

sub calcNnEntropy {

#print "Calculating Entropy..\n";

    my $dna = shift(@_);

    my %entropies = (
        'AA' => -22.2,
        'TT' => -22.2,
        'AT' => -20.4,
        'TA' => -21.3,
        'CA' => -22.7,
        'TG' => -22.7,
        'GT' => -22.4,
        'AC' => -22.4,
        'CT' => -21.0,
        'AG' => -21.0,
        'GA' => -22.2,
        'TC' => -22.2,
        'CG' => -27.2,
        'GC' => -24.4,
        'GG' => -19.9,
        'CC' => -19.9
        );

#initialize total entropy depending on whether there are terminal A/T                                                                                    

    my $totalEntropy;

    if((substr($dna,0,1) eq 'A' || substr($dna,0,1) eq 'T') && (substr($dna,-1) eq 'A'|| substr($dna,-1) eq 'T'))
    {$totalEntropy = 8.1;}                                                                                                                          

    elsif((substr($dna,-1) eq 'A'|| substr($dna,-1) eq 'T') && (substr($dna,-1) ne 'A'|| substr($dna,-1) ne 'T'))
    {$totalEntropy = 1.2;}                                                                                                                          

    elsif((substr($dna,-1) ne 'A' || substr($dna,-1) ne 'T') && (substr($dna,-1) eq 'A' || substr($dna,-1) eq 'T'))
    {$totalEntropy = 1.2;}                                                                                                                          

    else {$totalEntropy = -5.7;}                                                                                                                    

#add together entropies for each NN in string                                                                                                            

    for(my $i=0; $i < (length($dna) - 1); $i++) {

        $totalEntropy += $entropies{substr($dna,$i,2)};
    }

    return $totalEntropy;

}
#######subroutine to calculate deltaG at 37C by summing over nearest neighbors using the unified nearest neighbors values

sub calcDeltaG37 {

    my $dna = shift(@_);

    my %deltaG37 = (
        'AA' => -1.0,
        'TT' => -1.0,
        'AT' => -0.88,
        'TA' => -0.58,
        'CA' => -1.45,
        'TG' => -1.45,
        'GT' => -1.44,
        'AC' => -1.44,
        'CT' => -1.28,
        'AG' => -1.28,
        'GA' => -1.30,
        'TC' => -1.30,
        'CG' => -2.17,
        'GC' => -2.24,
        'GG' => -1.84,
        'CC' => -1.84
        );

    my $totalDeltaG37;

    if((substr($dna,0,1) eq 'A' || substr($dna,0,1) eq 'T') && (substr($dna,-1) eq 'A'|| substr($dna,-1) eq 'T'))
    {$totalDeltaG37 = 2.06;}

    elsif((substr($dna,-1) eq 'A'|| substr($dna,-1) eq 'T') && (substr($dna,-1) ne 'A'|| substr($dna,-1) ne 'T'))
    {$totalDeltaG37 = 2.01;}

    elsif((substr($dna,-1) ne 'A' || substr($dna,-1) ne 'T') && (substr($dna,-1) eq 'A' || substr($dna,-1) eq 'T'))
    {$totalDeltaG37 = 2.01;}

    else {$totalDeltaG37 = 1.96;}

    for(my $i=0; $i < (length($dna) - 1); $i++) {
  
        $totalDeltaG37 += $deltaG37{substr($dna,$i,2)};
    }

    return $totalDeltaG37;

}

##Thanks for checking out what's under the hood. If you have any questions feel free to email me - red272 at nyu.edu